library(data.table)
library(tidyr)
#read the data (Wave 5)
# Data of Wave 5
WV5_data <- readRDS("/Users/cristinacandido/Documents/Github/risk_wvs/data/WVS/F00007944-WV5_Data_R_v20180912.rds")
# Convert WV5_data-object in data.frame
WV5_data_df <- as.data.frame(WV5_data)
# show first five columns
head(WV5_data_df[, 1:5])
library(dplyr)
#rename the variables
WV5_data <- WV5_data_df %>%
rename(sex = V235, age = V237, country = V2, wave = V1, family_important = V4, friends_important = V5, leisure_time = V6, happiness = V10, health = V11, satisfaction = V22, freedom = V46, marital_status = V55, children = V56, creativity = V80, money = V81, security = V82, goodtime = V83, help = V84, success = V85, risk = V86, proper = V87, environment = V88, tradition = V89, employment = V241, education = V238,)
WV5_data
#select only the variables of interest
WV5_data <- WV5_data %>%
select(sex, age, country, wave, family_important, leisure_time, happiness, health, satisfaction, marital_status, children, creativity, money, security, goodtime, help, success, risk, proper, environment, tradition, employment, education)
WV5_data
#exlcusion of participants with no info about risk, sex, age, employment, merital status and children
WV5_data_df = subset(WV5_data, risk_and_adventure > 0 & sex > 0 & age >0)
#decode the country names
countrynames = read.csv("/Users/cristinacandido/Documents/Github/risk_wvs/data/WVS/countrynames.txt", header=FALSE,as.is=TRUE)
colnames(countrynames) = c("code", "name")
WV5_data$country_lab = countrynames$name [match(WV5_data$country, countrynames$code)]
table(WV5_data$country_lab)
Andorra Argentina Australia Brazil Bulgaria Burkina Faso Canada Chile
1003 1002 1421 1500 1001 1534 2164 1000
China Colombia Cyprus (G) Egypt Ethiopia Finland France Georgia
1991 3025 1050 3051 1500 1014 1001 1500
Germany Ghana Great Britain Guatemala Hong Kong Hungary India Indonesia
2064 1534 1041 1000 1252 1007 2001 2015
Iran Iraq Italy Japan Jordan Malaysia Mali Mexico
2667 2701 1012 1096 1200 1201 1534 1560
Moldova Morocco Netherlands New Zealand Norway Peru Poland Romania
1046 1200 1050 954 1025 1500 1000 1776
Russia Rwanda Slovenia South Africa South Korea Spain Sweden Switzerland
2033 1507 1037 2988 1200 1200 1003 1241
Taiwan Thailand Trinidad and Tobago Turkey Ukraine United States Uruguay Viet Nam
1227 1534 1002 1346 1000 1249 1000 1495
Zambia
1500
WV5_data
NA
NA
#Read Dataset (Wave 6)
WV6_data <- load("/Users/cristinacandido/Documents/Github/risk_wvs/data/WVS/WV6_Data_R_v20201117.rdata")
WV6_data <- WV6_Data_R_v20201117
print(WV6_data)
` ``{r} #rename variables
WV6_data <- WV6_data %>%
rename(wave = V1, risk_and_adventure = V76, sex = V240, age = V242, education = V237, country = V2)
#select only the variables of interest
WV6_data <- WV6_data %>%
select(risk_and_adventure, sex, age, country, wave)
WV6_data
NA
#decode daraset (Wave 6)
countrynames = read.csv("/Users/cristinacandido/Documents/Github/risk_wvs/data/WVS/countrynames.txt", header=FALSE,as.is=TRUE)
colnames(countrynames) = c("code", "name")
WV6_data$country_lab = countrynames$name [match(WV6_data$country, countrynames$code)]
table(WV6_data$country_lab)
Algeria Argentina Armenia Australia Azerbaijan Belarus Brazil
1200 1030 1100 1477 1002 1535 1486
Chile China Colombia Cyprus (G) Ecuador Egypt Estonia
1000 2300 1512 1000 1202 1523 1533
Georgia Germany Ghana Haiti Hong Kong India Iraq
1202 2046 1552 1996 1000 4078 1200
Japan Jordan Kazakhstan Kuwait Kyrgyzstan Lebanon Libya
2443 1200 1500 1303 1500 1200 2131
Malaysia Mexico Morocco Netherlands New Zealand Nigeria Pakistan
1300 2000 1200 1902 841 1759 1200
Palestine Peru Philippines Poland Qatar Romania Russia
1000 1210 1200 966 1060 1503 2500
Rwanda Singapore Slovenia South Africa South Korea Spain Sweden
1527 1972 1069 3531 1200 1189 1206
Taiwan Thailand Trinidad and Tobago Tunisia Turkey Ukraine United States
1238 1200 999 1205 1605 1500 2232
Uruguay Uzbekistan Yemen Zimbabwe
1000 1500 1000 1500
WV6_data
#exclude participants with no info about risk, sex, and age
WV6_data = subset(WV6_data, risk_and_adventure > 0 & sex > 0 & age >0)
#combine the 2 dataset (Wave 6 + Wave 5)
data = rbind(WV5_data, WV6_data)
data
#number of countries
length(unique(data$country_lab))
[1] 80
#number of participants
nrow(data)
[1] 170195
#exclusion of participants
data = subset(data, risk_and_adventure > 0 & sex > 0 & age > 0)
data
NA
#number of males vs females (1 = males; 2 = females)
table(data$sex)
1 2
75737 81963
#create a categorical age variable
data$agecat[data$age<20]="15-19"
data$agecat[data$age>=20 & data$age <30] = "20-29"
data$agecat[data$age>=30 & data$age <40] = "30-39"
data$agecat[data$age>=40 & data$age <50] = "40-49"
data$agecat[data$age>=50 & data$age <60] = "50-59"
data$agecat[data$age>=60 & data$age <70] = "60-69"
data$agecat[data$age>=70 & data$age <80] = "70-79"
data$agecat[data$age>=80] = "80+"
#gender variables
data$sex[data$sex == 1] <- "male"
data$sex[data$sex == 2] <- "female"
#average age of participants
mean(data$age)
[1] 41.62714
#age range
range(data$age)
[1] 15 102
#risk taking Frequency
library(ggplot2)
ggplot(data, aes(x = risk_and_adventure)) +
geom_histogram(binwidth = 0.5, fill = "lightblue", color = "black") +
labs(x = "Risk Taking", y = "Frequency", title = "Histogram of Risk Taking") +
theme_minimal()
#age frequency
ggplot(data, aes(x = age)) +
geom_histogram(binwidth = 0.5, fill = "lightblue", color = "black") +
labs(x = "Age", y = "Frequency", title = "Histogram of Age Distributionn") +
theme_minimal()
#age vs risk taking
ggplot(data, aes(x = agecat, y = risk_and_adventure)) +
geom_boxplot() +
labs(title = "Boxplot of Risk and Adventure by Age",
x = "Age",
y = "Risk and Adventure") +
theme_minimal()
NA
NA
#sex vs risk taking
ggplot(data, aes(as.factor(sex), risk_and_adventure ))+
geom_boxplot()
data
summary(data)
risk_and_adventure sex age country wave
Min. :1.000 Length:157700 Min. : 15.00 Min. : 12.0 Min. :5.000
1st Qu.:3.000 Class :character 1st Qu.: 28.00 1st Qu.:276.0 1st Qu.:5.000
Median :4.000 Mode :character Median : 39.00 Median :466.0 Median :6.000
Mean :3.794 Mean : 41.63 Mean :478.9 Mean :5.547
3rd Qu.:5.000 3rd Qu.: 54.00 3rd Qu.:710.0 3rd Qu.:6.000
Max. :6.000 Max. :102.00 Max. :894.0 Max. :6.000
country_lab agecat
Length:157700 Length:157700
Class :character Class :character
Mode :character Mode :character
```